From 9c739800a8915d5f2a73c840190920e95ffa1c5c Mon Sep 17 00:00:00 2001
From: Reini Urban <rurban@cpan.org>
Date: Fri, 18 Feb 2022 09:46:45 +0100
Subject: [PATCH] fix armv7 asm inline error GH #115

some armv7 buildroot variants fail on asm.
we already probe for that, so use it.
Fixes GH #115

[Retrieved from:
https://github.com/rurban/safeclib/commit/9c739800a8915d5f2a73c840190920e95ffa1c5c]
Signed-off-by: Fabrice Fontaine <fontaine.fabrice@gmail.com>
---
 tests/perf_private.h | 49 +++++++++++++++++++++++++-------------------
 1 file changed, 28 insertions(+), 21 deletions(-)

diff --git a/tests/perf_private.h b/tests/perf_private.h
index 3296cb3d..843674d3 100644
--- a/tests/perf_private.h
+++ b/tests/perf_private.h
@@ -1,9 +1,9 @@
 /*------------------------------------------------------------------
  * perf_private.h - Internal benchmarking tools
  *
- * 2020  Reini Urban
+ * 2020,2022  Reini Urban
  *
- * Copyright (c) 2017, 2020 Reini Urban
+ * Copyright (c) 2017, 2020, 2022 Reini Urban
  * All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person
@@ -55,13 +55,16 @@ static inline uint64_t timer_start();
 static inline uint64_t timer_end();
 
 static inline clock_t rdtsc() {
-#ifdef __x86_64__
+#ifndef ASM_INLINE
+#define NO_CYCLE_COUNTER
+    return clock();
+#elif defined __x86_64__
     uint64_t a, d;
-    __asm__ volatile("rdtsc" : "=a"(a), "=d"(d));
+    ASM_INLINE volatile("rdtsc" : "=a"(a), "=d"(d));
     return (clock_t)(a | (d << 32));
 #elif defined(__i386__)
     clock_t x;
-    __asm__ volatile("rdtsc" : "=A"(x));
+    ASM_INLINE volatile("rdtsc" : "=A"(x));
     return x;
 #elif defined(__ARM_ARCH) && (__ARM_ARCH >= 7) && (SIZEOF_SIZE_T == 4)
   // V7 is the earliest arch that has a standard cyclecount (some say 6)
@@ -69,11 +72,11 @@ static inline clock_t rdtsc() {
   uint32_t pmuseren;
   uint32_t pmcntenset;
   // Read the user mode perf monitor counter access permissions.
-  asm volatile("mrc p15, 0, %0, c9, c14, 0" : "=r"(pmuseren));
+  ASM_INLINE volatile("mrc p15, 0, %0, c9, c14, 0" : "=r"(pmuseren));
   if (pmuseren & 1) {  // Allows reading perfmon counters for user mode code.
-    asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r"(pmcntenset));
+    ASM_INLINE volatile("mrc p15, 0, %0, c9, c12, 1" : "=r"(pmcntenset));
     if (pmcntenset & 0x80000000ul) {  // Is it counting?
-      asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(pmccntr));
+      ASM_INLINE volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(pmccntr));
       // The counter is set up to count every 64th cycle
       return (int64_t)(pmccntr) * 64;  // Should optimize to << 6
     }
@@ -83,22 +86,22 @@ static inline clock_t rdtsc() {
   uint64_t pmccntr;
   uint64_t pmuseren = 1UL;
   // Read the user mode perf monitor counter access permissions.
-  //asm volatile("mrs cntv_ctl_el0,  %0" : "=r" (pmuseren));
+  //ASM_INLINE volatile("mrs cntv_ctl_el0,  %0" : "=r" (pmuseren));
   if (pmuseren & 1) {  // Allows reading perfmon counters for user mode code.
-    asm volatile("mrs %0, cntvct_el0" : "=r" (pmccntr));
+    ASM_INLINE volatile("mrs %0, cntvct_el0" : "=r" (pmccntr));
     return (uint64_t)(pmccntr) * 64;  // Should optimize to << 6
   }
   return (uint64_t)rdtsc();
 #elif defined(__powerpc64__) || defined(__ppc64__)
     uint64_t tb;
-    __asm__ volatile (\
+    ASM_INLINE volatile (\
       "mfspr %0, 268"
       : "=r" (tb));
     return tb;
 #elif defined(__powerpc__) || defined(__ppc__)
     // This returns a time-base, which is not always precisely a cycle-count.
     uint32_t tbu, tbl, tmp;
-    __asm__ volatile (\
+    ASM_INLINE volatile (\
       "0:\n"
       "mftbu %0\n"
       "mftbl %1\n"
@@ -109,12 +112,12 @@ static inline clock_t rdtsc() {
     return (((uint64_t) tbu << 32) | tbl);
 #elif defined(__sparc__)
     uint64_t tick;
-    asm(".byte 0x83, 0x41, 0x00, 0x00");
-    asm("mov   %%g1, %0" : "=r" (tick));
+    ASM_INLINE(".byte 0x83, 0x41, 0x00, 0x00");
+    ASM_INLINE("mov   %%g1, %0" : "=r" (tick));
     return tick;
 #elif defined(__ia64__)
     uint64_t itc;
-    asm("mov %0 = ar.itc" : "=r" (itc));
+    ASM_INLINE("mov %0 = ar.itc" : "=r" (itc));
     return itc;
 #else
 #define NO_CYCLE_COUNTER
@@ -126,9 +129,11 @@ static inline clock_t rdtsc() {
 // 3.2.1 The Improved Benchmarking Method
 static inline uint64_t timer_start()
 {
-#if defined (__i386__) || (defined(__x86_64__) && SIZEOF_SIZE_T == 4)
+#ifndef ASM_INLINE
+    return (uint64_t)rdtsc();
+#elif defined (__i386__) || (defined(__x86_64__) && SIZEOF_SIZE_T == 4)
   uint32_t cycles_high, cycles_low;
-  __asm__ volatile
+  ASM_INLINE volatile
       ("cpuid\n\t"
        "rdtsc\n\t"
        "mov %%edx, %0\n\t"
@@ -137,7 +142,7 @@ static inline uint64_t timer_start()
     return ((uint64_t)cycles_high << 32) | cycles_low;
 #elif defined __x86_64__
   uint32_t cycles_high, cycles_low;
-  __asm__ volatile
+  ASM_INLINE volatile
       ("cpuid\n\t"
        "rdtsc\n\t"
        "mov %%edx, %0\n\t"
@@ -151,9 +156,11 @@ static inline uint64_t timer_start()
 
 static inline uint64_t timer_end()
 {
-#if defined (__i386__) || (defined(__x86_64__) && defined (HAVE_BIT32))
+#ifndef ASM_INLINE
+    return (uint64_t)rdtsc();
+#elif defined (__i386__) || (defined(__x86_64__) && defined (HAVE_BIT32))
   uint32_t cycles_high, cycles_low;
-  __asm__ volatile
+ ASM_INLINE volatile
       ("rdtscp\n\t"
        "mov %%edx, %0\n\t"
        "mov %%eax, %1\n\t"
@@ -162,7 +169,7 @@ static inline uint64_t timer_end()
     return ((uint64_t)cycles_high << 32) | cycles_low;
 #elif defined __x86_64__
   uint32_t cycles_high, cycles_low;
-  __asm__ volatile
+  ASM_INLINE volatile
       ("rdtscp\n\t"
        "mov %%edx, %0\n\t"
        "mov %%eax, %1\n\t"
